"""Make figure showing yearly rate of new novel publications.

Numbers are normalized per person.

When numbers are known, they are shown. When they are not known, model estimates
are shown.

"""
import argparse
import hashlib
import os
import pickle

import matplotlib.pyplot as plt
import matplotlib.style
import matplotlib.ticker
import numpy as np
import pandas as pd
import scipy.special

import datasets
import inference
import make_table_novels_by_year  # function called likely belongs in datasets
import plot_common

parser = argparse.ArgumentParser()
parser.add_argument('output_filename', help='Output path for figure.')

# use seaborn style globally
matplotlib.style.use('seaborn-deep')


def make_plot(output_filename):
    # limit to 1820-1919 (years for which population figures are available)
    df = make_table_novels_by_year.dataset_years()
    population = datasets._population_british_isles()
    fig = plt.figure()  # noqa
    ax = plt.gca()
    # default for set_yscale is log10
    ax.set_yscale('log')
    assert len(df) == (1919 - 1789 + 1)

    # calculate important summary statistics here, for lack of a better place to do it.
    novels_per_capita_1820_1836 = df.loc[1820:1836, 'novels'] / population.loc[1820:1836]
    novels_per_capita_1837_1900 = df.loc[1837:1900, 'y_sim_p50'] / population.loc[1837:1900]
    novels_per_capita = pd.concat([novels_per_capita_1820_1836, novels_per_capita_1837_1900])
    print('average annual percent change, 1821-1837', novels_per_capita.pct_change().loc[1821:1837].mean())
    print('average annual percent change, 1840-1855', novels_per_capita.pct_change().loc[1840:1855].mean())
    print('average annual percent change, 1855-1900', novels_per_capita.pct_change().loc[1855:1900].mean())

    for i, year in enumerate(range(1820, 1836 + 1)):
        count = df.loc[year, 'novels'] / population.loc[year] * 1_000_000
        ax.plot(year, count, marker='.', markersize=3, alpha=0.8, color='black')
    for i, year in enumerate(range(1837, 1919 + 1)):
        p05, p25, p50, p75, p95 = df.loc[year, [f'y_sim_{p}' for p in ('p05', 'p25', 'p50', 'p75', 'p95')]] / population.loc[year] * 1_000_000
        width = plot_common.bar_width
        ax.fill_between([year - width / 8, year + width / 8], p05, p95, alpha=0.7, color='k')
        ax.fill_between([year - width / 2, year + width / 2], p25, p75, alpha=0.7, color='k')
        # markersize in pixels (?)
        # ax.plot(year, p50, marker='.', markersize=3, alpha=0.5, color='black')
    plt.ylim(plt.ylim()[0], 100)
    # title added to caption
    #plt.title('New Novels Per Million Persons, 1820-1919')
    ax.xaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator())
    ax.yaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator())
    plt.savefig(output_filename)
    print('saved plot to file:', output_filename)


if __name__ == '__main__':
    args = parser.parse_args()
    make_plot(args.output_filename)
